%matplotlib notebook
#Remove if not in Jupyter notebook
#Preamp
import numpy as np
import pandas as pd
pd.options.display.float_format = '{:,.2f}'.format
import matplotlib.pyplot as plt
from matplotlib import style
style.use('seaborn-whitegrid')
#print(plt.style.available)
import statsmodels.api as sm
import plotly
plotly.tools.set_credentials_file(username='Jorgen.c.r', api_key='OGlmTbHeDHk4KZ214hLY')
from matplotlib.ticker import FuncFormatter
import pandas_datareader.data as web
import datetime as dt
import statsmodels.tools
path = str('C:\data\Assignment\data.xlsx') #This is the path from where I retrieve my data
# Had trouble obtaining data for Nikkei225 from Reuters, so I'll use
# Yahoo Finance API
start = dt.datetime(2010,1,1) # Defining a start date for my web request
end = dt.datetime(2017,3,1) # Defining a end date for my web request
nikkei = web.DataReader('^N225', 'yahoo', start, end) # Dataframe with values for the Nikkei225 index
df = pd.read_excel(path,parse_cols="F:T",index_col=0, header=2, sheetname="Sheet2") # Main dataframe for all other instruments
# To ensure that I have data for all instruments, I slice my dataframe to align the dataframe
df = df.ix[0:1294]
df = df.merge(nikkei, left_index = True, right_index= True, how='left' )
df.drop(df.columns[[2,14,15,16,18,19]], axis=1, inplace=True)
df.rename(columns={'Close': '.N225'}, inplace=True)
df=df.fillna(method='ffill') # As there are some missing values due to holidays etc. I fill those values from last valid observation
# Importing key dates and assigning them as a pandas Series
dates = pd.read_excel(path, parse_cols="F", skiprows = 2, header=0, sheetname="Sheet2")
dates = pd.Series(df.index)
After import all my data, I end up with a dataframe looking like this:
df.head()
# Calculating log returns:
def calculate_logreturns(dataframe):
df_return = np.log(dataframe/dataframe.shift(-1))
return df_return
# Function for importing key dates:
keypath = str('C:/Users/Christopher/Dropbox/2. ICMA Centre/2. Module/ICM142 - Programming for Finance/Assignment/keydates.xlsx')
def import_keydates(pathkey):
"This is a function to retrieve key dates from an Excel sheet and append them in a list"
temp1 = pd.read_excel(pathkey,names=['Dates'], parse_cols="A")
temp2 = temp1['Dates']
key_dates = []
for j in range (0,len(temp2)):
key_dates.append(pd.to_datetime(temp2.loc[j]))
return key_dates
# Iterating through key dates to assign dummy variables:
def iteration(input_list, key_list, assigned_variable):
"""This function allows me to easily construct a list of dummy variables that depends on already pre-defined key dates"""
dummy = []
for i in range(0,len(input_list)):
if input_list.loc[i] in key_list:
dummy.append(int(assigned_variable))
else:
dummy.append(0)
return dummy
# Function for localizing index values of my key dates in the dataframe
def index_keys(inputlist):
"""
This function is created to find index values for the key dates
this is helpful when I want to find cumulative returns
"""
indexer = []
for item in inputlist:
indexer.append(df_return.index.get_loc(item))
indexer.sort()
return indexer
# Cumulative list creator
def cumulative_list(index_list, start, end):
"""
This function creates a list with index keys obtained from the index_keys function for T-t and T+t,
"""
test = []
for i in range (0,len(index_list)):
test2 = []
for j in range (index_list[i]+int(start),index_list[i]+int(end)+1):
test2.append(j)
test.append(test2)
return test
def plotting(Dataframes, xlabel, ylabel, title):
fig = Dataframes.plot(figsize=(20,10)).legend(bbox_to_anchor=(1.1, 0.5))
plt.xlabel(str(xlabel))
plt.ylabel(str(ylabel))
plt.title(str(title))
return fig
def index_plotting(list_in):
"""
This allows us to create a starting point for our graph, indexed to a 100 at the start of the period
Assumes that the list is for a single index at the moment
"""
plot_indexing = list_in
plot_indexing.sort_index(ascending=True, inplace = True)
indexvalue = []
for i in range(0,len(plot_indexing)):
indexvalue.append(plot_indexing[i]/plot_indexing[0])
dataframe = pd.DataFrame(indexvalue)
return dataframe
def Q5_appender(event):
"""
This function creates appends either positive or negative depending on whether the market return was
positive or negative on a key event date
"""
iterationlist = df_return.loc[event]
appending = []
for item in iterationlist.index:
if item == 'Dummy Variables':
del iterationlist['Dummy Variables']
break
for subitem in iterationlist:
if subitem > 0:
appending.append('Positive')
else:
appending.append('Negative')
return appending
# Function for calculating average, std. dev. and median, and then transposing the vectors:
def avgstdmedian(df):
std = df.std(axis=0,ddof=1)
average = df.mean(axis=0)
median = df.median(axis=0)
std = pd.DataFrame(data=std).transpose()
average = pd.DataFrame(data=average).transpose()
median = pd.DataFrame(data=median).transpose()
return std, average, median
df_return = calculate_logreturns(df) # Creating a dataframe consisting of my log returns for all instruments
key_dates = import_keydates(keypath) # Importing all identified key dates from my excel sheet
dummy_variables = iteration(dates,key_dates,1) # Constructing a indicator function using dummy variables with the value of 1
df_return['Dummy Variables'] = dummy_variables
Index_keys = index_keys(key_dates)
List_for_cumulative_returns = cumulative_list(Index_keys,start=-5,end=5)
In this section, I only consider the British financial markets. I create another dataframe in order to derive estimates for median, average and standard deviation for the different instruments
I start of by creating a new dataframe where I only include instruments for UK:
FTSE_key_events_df = df_return[df_return['Dummy Variables']==1]
FTSE_key_events_df = FTSE_key_events_df[['.FTSE', 'GB 10Y GILT', 'GB 7Y GILT', 'GB 2Y GILT', 'GB 3M T-BILL','GBPEUR', 'GBPUSD']]
FTSE_key_events_df.sort_index(ascending=True, inplace = True)
FTSE_key_events_df.head(n=10)
Then, I calculate the standard deviation, average and median for the new dataframe. To include the calculated values in the already existing dataframe, I need to add the vectors consisting of the values as a row and rename the index. This is achieved utilizing my previously defined funtion:
[std, average, median] = avgstdmedian(FTSE_key_events_df)
After appending the values to the dataframe, I rename their index as follows:
FTSE_key_events_df = FTSE_key_events_df.append(std)
FTSE_key_events_df = FTSE_key_events_df.rename(index={0: 'Standard dev'}) # After appending the row, the index is automatically
# assumed to be '0', which I can use to rename the value
FTSE_key_events_df = FTSE_key_events_df.append(average)
FTSE_key_events_df = FTSE_key_events_df.rename(index={0: 'Average return'})
FTSE_key_events_df = FTSE_key_events_df.append(median)
FTSE_key_events_df = FTSE_key_events_df.rename(index={0: 'Median'})
This yields a dataframe with returns for all identified key dates, along with the calculated standard deviation, average and median for these dates:
pivoted = np.round(FTSE_key_events_df*100, decimals=2) # Rounds down to two decimals, and multiplies with 100 for presentation purposes only
pivoted
#pivoted.to_excel('FTSE_Key_dates.xlsx') # This is done to export the dataframe to Excel and subsequently LaTeX
The next step is to calculate a correlation matrix. This is also done using pandas own functions:
correlation_table = df_return[['.FTSE', 'GB 10Y GILT', 'GB 7Y GILT', 'GB 2Y GILT', 'GB 3M T-BILL','GBPEUR', 'GBPUSD', 'Dummy Variables']]
correlation_table = correlation_table.corr()
correlation_table = np.round(correlation_table, decimals = 3)
correlation_table
#correlation_table.to_excel('CorrelationQ1.xlsx')
This process is now repeated, only now I'm doing it for the international stock markets:
Q2 = df_return[df_return['Dummy Variables']==1]
Q2 = Q2[['.FTSE', '.HSCI', '.FTMIB', '.MCX', '.FCHI', '.SPX', '.GDAXI', '.N225']]
Q2 = Q2.sort_index()
[std, average, median] = avgstdmedian(Q2)
Q2 = Q2.append(std)
Q2 = Q2.rename(index={0: 'Standard dev'})
Q2 = Q2.append(average)
Q2 = Q2.rename(index={0: 'Average return'})
Q2 = Q2.append(median)
Q2 = Q2.rename(index={0: 'Median'})
Q2 = np.round((Q2*100),decimals=2)
Q2
# Q2.to_excel('InternationalMarketReactions.xlsx')
Q2_corr = df_return[['.FTSE', '.HSCI', '.FTMIB', '.MCX', '.FCHI', '.SPX', '.GDAXI', '.N225', 'Dummy Variables']]
Q2_corr = Q2_corr.corr()
Q2_corr = np.round(Q2_corr, decimals = 3)
Q2_corr
#Q2_corr.to_excel('CorrelationQ2.xlsx')
To run all the different regressions, I have chosen to wrap all regression results in a single dictionary as follows:
regression1 = {} # Preparing a dictionary for use
Ydata = FTSE_key_events_df.columns # Retrieves a list of the different instruments for use
xdata = df_return['Dummy Variables'][:-1] # Excluding the last observation to align the indicator vector with the dataframe
for item in Ydata:
results = 0
ydata = df_return[item][:-1]
results = sm.OLS(ydata, statsmodels.tools.add_constant(xdata)).fit()
regression1[item] = results
# All regression results are now stored in the dictionary and can easily be accessed:
regression1['GB 10Y GILT'].summary()
# The regression results are then transferred to Excel using Rodeo's copy function
The same procedure is employed for international markets:
regression2 = {} # Preparing a dictionary for use
Ydata = Q2.columns # Retrieves a list of the different instruments for use
xdata = df_return['Dummy Variables'][:-1] # Excluding the last observation to align the indicator vector with the dataframe
for item in Ydata:
results = 0
ydata = df_return[item][:-1]
results = sm.OLS(ydata, statsmodels.tools.add_constant(xdata)).fit()
regression2[item] = results
# All regression results are now stored in the dictionary and can easily be accessed:
regression2['.HSCI'].summary()
When defining the function 'cumulative_list', I included two parameters which allow me to easily change the time-range for my intervals. I construct a figure and save it to a pre-specified path using iterations. I have define "i" as a user-specified variable to allow quick alteration between key event dates. NEED TO RE-RUN THIS TO ENSURE CORRECT DATES ETC!!!!!
When creating the figures, bbox_inches="tight" is used to eliminate all unecessary whitespace around the figures. First, I define two variables which makes the process easier. 'tt' is a varible for used to control how many days after the key event the cumulative average is to be calclulated for. 't' defines where the for loop is storing the created figures:
for tt in range (0,6):
t = 'T'+str(tt)
List_for_cumulative_returns = cumulative_list(Index_keys, start = -tt, end = 5)
for j in range (0,(len(key_dates))):
i = j
xdata = df_return['.FTSE'].ix[List_for_cumulative_returns[i]].index.sort_values(ascending = True)
ydata0 = df_return['.FTSE'].ix[List_for_cumulative_returns[i]].sort_index(ascending = True).cumsum()*100
ydata1 = df_return['GB 10Y GILT'].ix[List_for_cumulative_returns[i]].sort_index(ascending = True).cumsum()*100
ydata2 = df_return['GB 7Y GILT'].ix[List_for_cumulative_returns[i]].sort_index(ascending = True).cumsum()*100
ydata3 = df_return['GB 2Y GILT'].ix[List_for_cumulative_returns[i]].sort_index(ascending = True).cumsum()*100
ydata4 = df_return['GB 3M T-BILL'].ix[List_for_cumulative_returns[i]].sort_index(ascending = True).cumsum()*100
ydata5 = df_return['GBPEUR'].ix[List_for_cumulative_returns[i]].sort_index(ascending = True).cumsum()*100
ydata6 = df_return['GBPUSD'].ix[List_for_cumulative_returns[i]].sort_index(ascending = True).cumsum()*100
fig = plt.figure(figsize=(20,10))
ax1 = plt.subplot2grid((1,1),(0,0))
ax1.plot_date(xdata, ydata0, '-', label='FTSE100')
ax1.plot_date(xdata, ydata1, '-', label='10Y Gilt')
ax1.plot_date(xdata, ydata2, '-', label='7Y Gilt')
ax1.plot_date(xdata, ydata3, '-', label='2Y Gilt')
ax1.plot_date(xdata, ydata4, '-', label='3M T-Bill')
ax1.plot_date(xdata, ydata5, '-', label='GBPEUR')
ax1.plot_date(xdata, ydata6, '-', label='GBPUSD')
fig.autofmt_xdate()
#plt.xlabel('Dates')
plt.ylabel('Cumulative return')
plt.title('Cumulative return for key date\n'+str(key_dates[6-i].date()))
plt.subplots_adjust(left = 0.05, bottom = 0.1, right = .65, top = 0.95, wspace = 0.0, hspace = 0)
box = ax1.get_position()
ax1.set_position([box.x0, box.y0 + box.height * 0.1,
box.width, box.height * 0.9])
ax1.legend(loc='lower center', bbox_to_anchor=(0.5, 0),
fancybox=True, shadow=True, ncol=8)
ax1.xaxis.grid(False)
ax1.yaxis.grid(alpha = 1)
plt.show() # Might not be necessary to display all the figures
savepath = 'C:/Users/Christopher/Dropbox/2. ICMA Centre/PROTEXT test/Q4/'+str(t)+'/Q4_1'+str(i+1)+'.png'
plt.savefig(savepath,transparent = False, dpi = 50, bbox_inches="tight")
for tt in range (0,6):
t = 'T'+str(tt)
List_for_cumulative_returns = cumulative_list(Index_keys, start = -tt, end = 5)
for j in range (0,len(key_dates)):
i = j
xdata = df_return['.FTSE'].ix[List_for_cumulative_returns[i]].index.sort_values(ascending = True)
ydata0 = df_return['.FTSE'].ix[List_for_cumulative_returns[i]].sort_index(ascending = True).cumsum()*100
ydata1 = df_return['.HSCI'].ix[List_for_cumulative_returns[i]].sort_index(ascending = True).cumsum()*100
ydata2 = df_return['.FTMIB'].ix[List_for_cumulative_returns[i]].sort_index(ascending = True).cumsum()*100
ydata3 = df_return['.MCX'].ix[List_for_cumulative_returns[i]].sort_index(ascending = True).cumsum()*100
ydata4 = df_return['.FCHI'].ix[List_for_cumulative_returns[i]].sort_index(ascending = True).cumsum()*100
ydata5 = df_return['.SPX'].ix[List_for_cumulative_returns[i]].sort_index(ascending = True).cumsum()*100
ydata6 = df_return['.GDAXI'].ix[List_for_cumulative_returns[i]].sort_index(ascending = True).cumsum()*100
ydata7 = df_return['.N225'].ix[List_for_cumulative_returns[i]].sort_index(ascending = True).cumsum()*100
fig = plt.figure(figsize=(20,10))
ax1 = plt.subplot2grid((1,1),(0,0))
ax1.plot_date(xdata, ydata0, '-', label='FTSE100')
ax1.plot_date(xdata, ydata1, '-', label='HSCI')
ax1.plot_date(xdata, ydata2, '-', label='FTMIB')
ax1.plot_date(xdata, ydata3, '-', label='MCX')
ax1.plot_date(xdata, ydata4, '-', label='FHCI')
ax1.plot_date(xdata, ydata5, '-', label='SPX')
ax1.plot_date(xdata, ydata6, '-', label='GDAXI')
fig.autofmt_xdate()
#plt.xlabel('Dates')
plt.ylabel('Cumulative return')
plt.title('Cumulative return for key date\n'+str(key_dates[6-i].date()))
plt.subplots_adjust(left = 0.05, bottom = 0.1, right = .65, top = 0.95, wspace = 0.9, hspace = 0)
box = ax1.get_position()
ax1.set_position([box.x0, box.y0 + box.height * 0.1,
box.width, box.height * 0.9])
ax1.legend(loc='lower center', bbox_to_anchor=(0.5, 0),
fancybox=True, shadow=True, ncol=8)
ax1.xaxis.grid(False)
ax1.yaxis.grid(alpha = 1)
plt.show()
savepath = 'C:/Users/Christopher/Dropbox/2. ICMA Centre/PROTEXT test/Q4/'+str(t)+'/Q4_2'+str(i+1)+'.png'
plt.savefig(savepath,transparent = False, dpi = 50, bbox_inches="tight")